name = "Max"
def changeName():
name = "Niko"
print(f"name inside the function: {name}, address = {id(name)}")
changeName()
print(f"name outside of the function: {name}, address = {id(name)}")
name inside the function: Niko, address = 4388913008 name outside of the function: Max, address = 4386289840
def add(x, y, z=0):
return x + y + z
print(add(1, 2))
print(add(1, y=2, z=3))
3 6
def add(x, y, z=0):
"""
Calculate the sum of up to three numbers.
Parameters:
x (int/float): The first number to be added.
y (int/float): The second number to be added.
z (int/float, optional): The third number to be added. Defaults to 0 if not provided.
Returns:
int/float: The sum of the numbers.
"""
# Return the sum of the provided numbers, z is optional and defaults to 0 if not specified
return x + y + z
math in the same script?¶If you run
import myMoudle
and then update myMoudle and then reload with import myMoudle in Jupyter notebook, the module will not be updated. You will need to run
from importlib import reload
reload(myModule)
df where age is greater than 30, which command would you use?¶import pandas as pd
data = {
'name': ['Alice', 'Bob', 'Charlie', 'David'],
'age': [25, 30, 35, 40],
'height': [165.4, 175.3, 168.5, 180.6]
}
df = pd.DataFrame(data)
print(df)
df[df['age'] > 30]
name age height 0 Alice 25 165.4 1 Bob 30 175.3 2 Charlie 35 168.5 3 David 40 180.6
| name | age | height | |
|---|---|---|---|
| 2 | Charlie | 35 | 168.5 |
| 3 | David | 40 | 180.6 |
df, which method should you use?¶If you don't specify the key columns, it renames the rows
"MVR???A""ATG???TAG"!grep "furniture.*sell" ../downloads/blocket_listings.txt
desk furniture sell 2000 2018-01-14 couch furniture sell 500 2018-10-05 shoerack furniture sell 200 2018-10-24 wardrobe furniture sell 300 2018-10-23
Building blocks for creating patterns
. matches any character (once)? repeat previous pattern 0 or 1 times* repeat previous pattern 0 or more times+ repeat previous pattern 1 or more timescolour.*
.* matches everything (including the empty string)!
salt?peter
"salt?pet.."\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace\w+
\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace\d+
\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace\s+
\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace[abc] matches a single character defined in this set {a, b, c}[^abc] matches a single character that is not a, b or csalt?pet[er]+
1 920760 rs80259304 T C . PASS AA=T;AC=18;AN=120;DP=190;GP=1:930897;BN=131 GT:DP:CB 0/1:1:SM 0/0:4/SM...
0/0 0/1 1/1 ...
"[01]/[01]" (or "\d/\d")
\s[01]/[01]:
Example - finding patterns in vcf
1 920760 rs80259304 T C . PASS AA=T;AC=18;AN=120;DP=190;GP=1:930897;BN=131 GT:DP:CB 0/1:1:SM 0/0:4/SM...
... 1/1:... ... 1/1:... ...
.*1/1.*1/1.*
.*\s1/1:.*\s1/1:.*
. matches any character (once)? repeat previous pattern 0 or 1 times* repeat previous pattern 0 or more times+ repeat previous pattern 1 or more times\w matches any letter or number, and the underscore\d matches any digit\D matches any non-digit\s matches any whitespace (spaces, tabs, ...)\S matches any non-whitespace[abc] matches a single character defined in this set {a, b, c}[^abc] matches a single character that is not a, b or c[a-z] matches any (lowercased) letter from the english alphabet.* matches anything
# Import module
import re
# Define a pattern
p = re.compile('ab*')
p
re.compile(r'ab*', re.UNICODE)
# Search pattern in string
p = re.compile('ab*')
p.search('abc')
<re.Match object; span=(0, 2), match='ab'>
print(p.search('cb'))
None
p = re.compile('HELLO')
m = p.search('gsdfgsdfgs HELLO __!@£§≈[|ÅÄÖ‚…’fi]')
print(m)
<re.Match object; span=(12, 17), match='HELLO'>
# Remember, [a-z]+ matches any lower case english word
p = re.compile('[a-z]+')
result = p.search('ATGAAA')
print(result)
None
p = re.compile('[a-z]+', re.IGNORECASE)
result = p.search('ATGAAA')
result
<re.Match object; span=(0, 6), match='ATGAAA'>
p = re.compile('[a-z]+', re.IGNORECASE)
result = p.search('123 ATGAAA 456')
result
<re.Match object; span=(4, 10), match='ATGAAA'>
result.group(): Return the string matched by the expression
result.start(): Return the starting position of the match
result.end(): Return the ending position of the match
result.span(): Return both (start, end)
result.group()
'ATGAAA'
result.start()
4
result.end()
10
result.span()
(4, 10)
p = re.compile('.*HELLO.*')
m = p.search('lots of text HELLO more text and characters!!! ^^')
m.group()
'lots of text HELLO more text and characters!!! ^^'
The * is greedy.
# Find all instance of the defined pattern
p = re.compile('HELLO')
matches = p.finditer('lots of text HELLO more text HELLO ... and characters!!! ^^')
print(matches)
<callable_iterator object at 0x7ff202b6efa0>
# Loop through matches
for match in matches:
print(f'Found {match.group()} at position {match.start()}')
Found HELLO at position 14 Found HELLO at position 32
txt = "The first full stop is here: ."
pattern = re.compile('.')
match = pattern.search(txt)
print('"{}" at position {}'.format(match.group(), match.start()))
"T" at position 0
# Print all matches
matches = p.finditer(txt)
#for match in matches:
# print('"{}" at position {}'.format(match.group(), match.start()))
# Use escape character to search
p = re.compile('\.')
m = p.search(txt)
print('"{}" at position {}'.format(m.group(), m.start()))
"." at position 29
\ escaping a character^ beginning of the string$ end of string| boolean or^hello$
salt?pet(er|re) | nit(er|re) | KNO3
txt = "Do it becuase I say so, not becuase you want!"
# Spell the word because correctly
import re
p = re.compile('becuase')
txt = p.sub('because', txt)
print(txt)
Do it because I say so, not because you want!
# Remove additional spaces
p = re.compile('\s+')
p.sub(' ', txt)
'Do it because I say so, not because you want!'
Construct regular expressions
p = re.compile()
Searching
p.search(text)
Substitution
p.sub(replacement, text)
Typical code structure:
pattern = re.compile( ... )
match = pattern.search('string goes here')
if m:
print('Match found: ', match.group())
else:
print('No match')
fh = open('myfile.txt')
for line in fh:
do_stuff(line)
iterations = 0
information = []
fh = open('myfile.txt', 'r')
for line in fh:
iterations += 1
information += do_stuff(line)
Base types:
str "hello"
int 5
float 5.2
bool True
Collections:
list ["a", "b", "c"]
dict {"a": "alligator", "b": "bear", "c": "cat"}
tuple ("this", "that")
set {"drama", "sci-fi"}
iterations = 0
score = 5.2
+, -, *,... # mathematical
and, or, not # logical
==, != # (in)equality
<, >, <=, >= # comparison
in # membership
value = 4
nextvalue = 1
nextvalue += value
print('nextvalue: ', nextvalue, 'value: ', value)
nextvalue: 5 value: 4
x = 5
y = 7
z = 2
x > 6 and y == 7 or z > 1
True
(x > 6 and y == 7) or z > 1
True
Works like a list of characters
mystr = "one"
mystr += " two" # string concatnation
mystr
'one two'
len(mystr) # get the length
7
"one" in mystr # membership checking
True
mystr = "one"
mystr[1] = "W"
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[27], line 2 1 mystr = "one" ----> 2 mystr[1] = "W" TypeError: 'str' object does not support item assignment
mystr = "one"
print(mystr)
mystr = "two"
print(mystr)
one two
mystr = "one"
print(f"mystr = {mystr}, address = {id(mystr)}")
mystr = "two"
print(f"mystr = {mystr}, address = {id(mystr)}")
mystr = one, address = 4330374000 mystr = two, address = 4414743344
s.strip() # remove unwanted spacing
s.split() # split line into columns
s.upper(), s.lower() # change the case
p = re.compile('A.A.A')
p.search(dnastring)
p = re.compile('T')
p.sub('U', dnastring)
import re
p = re.compile('p.*\sp') # the greedy star!
p.search('a python programmer writes python code').group()
'python programmer writes p'
Can contain strings, integer, booleans...
Mutable: you can add, remove, change values
Lists:
mylist.append('value')
Dicts:
mydict['key'] = 'value'
Sets:
myset.add('value')
Test for membership:
value in myobj
Check size:
len(myobj)
todolist = ["work", "sleep", "eat", "work"]
todolist.sort()
todolist.reverse()
todolist[2]
todolist[-1]
todolist[2:6]
todolist = ["work", "sleep", "eat", "work"]
todolist.sort()
print(todolist)
['eat', 'sleep', 'work', 'work']
todolist.reverse()
print(todolist)
['work', 'work', 'sleep', 'eat']
todolist[2]
'sleep'
todolist[-1]
'eat'
todolist[2:]
['eat', 'work']
mydict = {"a": "alligator", "b": "bear", "c": "cat"}
counter = {"cats": 55, "dogs": 8}
mydict["a"]
mydict.keys()
mydict.values()
counter = {'cats': 0, 'others': 0}
for animal in ['zebra', 'cat', 'dog', 'cat']:
if animal == 'cat':
counter['cats'] += 1
else:
counter['others'] += 1
counter
{'cats': 2, 'others': 2}
Bag of values
No order
No duplicates
Fast membership checks
Logical set operations (union, difference, intersection...)
myset = {"drama", "sci-fi"}
myset.add("comedy")
myset.remove("drama")
todolist = ["work", "sleep", "eat", "work"]
todo_items = set(todolist)
todo_items
{'eat', 'sleep', 'work'}
todo_items.add("study")
todo_items
{'eat', 'sleep', 'study', 'work'}
todo_items.add("eat")
todo_items
{'eat', 'sleep', 'study', 'work'}
tup = (max_length, sequence)
length = tup[0] # get content at index 0
tup = (2, 'xy')
tup[0]
2
tup[0] = 2
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-54-874559a0c62a> in <module> ----> 1 tup[0] = 2 TypeError: 'tuple' object does not support item assignment
def find_longest_seq(file):
# some code here...
return length, sequence
answer = find_longest_seq(filepath)
print('length', answer[0])
print('sequence', answer[1])
answer = find_longest_seq(filepath) # return as a tuple
length, sequence = find_longest_seq(filepath) # return as two variables
if count > 10:
print('big')
elif count > 5:
print('medium')
else:
print('small')
shopping_list = ['bread', 'egg', ' butter', 'milk']
tired = True
if len(shopping_list) > 4:
print('Really need to go shopping!')
elif not tired:
print('Not tired? Then go shopping!')
else:
print('Better to stay at home')
Better to stay at home
information = []
fh = open('myfile.txt', 'r')
for line in fh:
if is_comment(line):
use_comment(line)
else:
information = read_data(line)
keep_going = True
information = []
index = 0
while keep_going:
current_line = lines[index]
information += read_line(current_line)
index += 1
if check_something(current_line):
keep_going = False
For loop
is a control flow statement that performs operations over a known amount of steps.
While loop
is a control flow statement that allows code to be executed repeatedly based on a given Boolean condition.
Which one to use?
For loops - standard for iterations over lists and other iterable objects
While loops - more flexible and can iterate an unspecified number of times
user_input = "thank god it's friday"
for letter in user_input:
print(letter.upper())
T H A N K G O D I T ' S F R I D A Y
i = 0
while i < len(user_input):
letter = user_input[i]
print(letter.upper())
i += 1
T H A N K G O D I T ' S F R I D A Y
break - stop the loopcontinue - go on to the next iterationuser_input = "thank god it's friday"
for letter in user_input:
if letter == 'd':
break
print(letter.upper())
T H A N K G O
Watch out!
# DON'T RUN THIS
i = 0
-while i < 10:
print(user_input[i])
Cell In[31], line 3 -while i < 10: ^ SyntaxError: invalid syntax
While loops may be infinite!
In:
fh = open(filename, 'r')for line in fh:fh.read()fh.readlines()sys.argv[1:]Out:
fh = open(filename, 'w')fh.write(text)print('my_information')None)def prettyprinter(name, value, delim=":", end=None):
out = "The " + name + " is " + delim + " " + value
if end:
out += end
return out
None)Any longer pieces of code that have been used and will be re-used should be saved
Save it as a file .py
To run it:
python3 mycode.py
or python mycode.py
Import it:
import mycode
""" This is a doc-string explaining what the purpose of this function/module is """
# This is a comment that helps understanding the code
Endless possibilities!